# -*- coding: utf-8 -*-
import urllib2
from bs4 import BeautifulSoup

ershoufang_url = "http://bj.lianjia.com/ershoufang/"
subCity_url = "http://bj.lianjia.com/ershoufang/dongcheng/"

citys = {}
subcitys = {}
i=1


def createAreaTable():
    print "CREATE TABLE `tb_area` ( `id` int(11) NOT NULL , `name` varchar(255) DEFAULT NULL, `label` varchar(255) DEFAULT NULL,`link` varchar(255) DEFAULT NULL,`parent` int(11) DEFAULT NULL,`order` int(11) DEFAULT NULL,`level` int(11) DEFAULT NULL,PRIMARY KEY (`id`)) ENGINE=InnoDB  DEFAULT CHARSET=utf8;"
    print "insert into `tb_area`(`id`,`name`,`label`,`link`,`parent`,`order`,`level`) values"

def parseArea(url):
    content = urllib2.urlopen(url).read()
    print content
    soup = BeautifulSoup(content, "html.parser")
    div = soup.find_all("div",attrs={"data-role":"ershoufang"},limit=1)
    links = div[0].find_all("a")
    order = 1
    i = 1
    for link in links:
        cityId = i
        i += 1
        citys[link.text] =  (cityId,link.get("href").replace("/ershoufang/","").replace("/",""),link.text, link.get("href"), 0, order, 2 )
        parseSubArea("http://bj.lianjia.com"+link.get("href"),cityId)

        order += 1


def parseSubArea(url,parent):
    content = urllib2.urlopen(url).read()
    soup = BeautifulSoup(content, "html.parser")
    div = soup.find_all("div",attrs={"data-role":"ershoufang"},limit=1)
    subAreaHtml = div[0].find_all("div")[1].find_all("a")
    subOrder = 1
    for link in subAreaHtml:
        subcitys[link.text] =  (-1,link.get("href").replace("/ershoufang/","").replace("/",""),link.text, link.get("href"), parent, subOrder, 3 )
        subOrder += 1
def printArea():
    for k in citys.keys():
        print citys[k]
    for k in subcitys.keys():
        print subcitys[k]
def main():
    parseArea(ershoufang_url)
    printArea()
if __name__ == "__main__":
    main()
